/******************************************************************************
 * $Header: /boot/home/agmsmith/Programming/MatchUTF8/RCS/MatchUTF8.h,v 1.3 2002/07/27 19:24:47 agmsmith Exp $
 *
 * Wildcard Pattern Matching for the UTF-8 (a Unicode variation) character set.
 *
 * Based on match.h by J. Kercheval, dated from 1991.  Updated for UTF-8
 * character encoding by Alexander G. M. Smith, summer 2002.
 *
 * $Log: MatchUTF8.h,v $
 * Revision 1.3  2002/07/27 19:24:47  agmsmith
 * Added const declarations to string pointers, removed MATCH_UTF8 error
 * since it should be part of MATCH_PATTERN.
 *
 * Revision 1.2  2002/07/25 20:55:51  agmsmith
 * Updated to use new names for UTF-8.
 *
 * A.G.M. Smith  Sat, 07/06/2002  11:18:33  Added UTF-8 character set support.
 * J. Kercheval  Tue, 03/12/1991  22:24:49  Released as V1.1 to Public Domain
 * J. Kercheval  Sun, 03/10/1991  18:47:47  error return from matche()
 * J. Kercheval  Sun, 03/10/1991  18:25:48  add error_type in is_valid_pattern
 * J. Kercheval  Sun, 03/10/1991  18:02:56  add is_valid_pattern
 * J. Kercheval  Wed, 02/20/1991  22:28:37  Released to Public Domain
 */

#ifndef MATCHUTF8__H
#define MATCHUTF8__H

#ifndef BOOLEAN
  #define BOOLEAN int
#endif
#ifndef TRUE
  #define TRUE 1
#endif
#ifndef FALSE
  #define FALSE 0
#endif

/* match defines */
#define MATCH_PATTERN  6    /* bad pattern */
#define MATCH_LITERAL  5    /* match failure on literal match */
#define MATCH_RANGE    4    /* match failure on [..] construct */
#define MATCH_ABORT    3    /* premature end of text string */
#define MATCH_END      2    /* premature end of pattern string */
#define MATCH_VALID    1    /* valid match */

/* pattern defines */
#define PATTERN_VALID  0    /* valid pattern */
#define PATTERN_ESC   -1    /* literal escape at end of pattern */
#define PATTERN_RANGE -2    /* malformed range in [..] construct */
#define PATTERN_CLOSE -3    /* no end bracket in [..] construct */
#define PATTERN_EMPTY -4    /* [..] construct is empty */
#define PATTERN_UTF8  -5    /* contains mangled UTF-8 characters */



/******************************************************************************
 * Match the pattern PATTERN against the string TEXT;
 *
 * MatchUTF8() returns TRUE if pattern matches, FALSE otherwise.
 * MatchUTF8e() returns MATCH_VALID if pattern matches, or an errorcode as
 * follows otherwise:
 *
 * MATCH_PATTERN  - bad pattern or misformed pattern.
 * MATCH_LITERAL  - match failed on character match (standard character).
 * MATCH_RANGE    - match failure on character range ([..] construct).
 * MATCH_ABORT    - premature end of text string (pattern longer than
 *                  text string).
 * MATCH_END      - premature end of pattern string (text longer than
 *                  what the pattern called for).
 * MATCH_VALID    - valid match using pattern.
 *
 * A match means the entire string TEXT is used up in matching.
 *
 * In the pattern string:
 *   * matches any sequence of characters (zero or more).
 *   ? matches any character.
 *   [SET] matches any character in the specified set,
 *   [!SET] or [^SET] matches any character not in the specified set.
 *
 *  A set is composed of characters or ranges; a range looks like 'character
 *  hyphen character' (as in 0-9 or A-Z).  [0-9A-Z_] is the set of all upper
 *  case English letters, digits and the underscore expressed as a concise []
 *  pattern construct (equivalent to [0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_]).
 *  Multibyte characters are allowed (ie. UTF-8 unicode characters) and
 *  treated as a single character for matching purposes and range purposes.
 *
 *  To suppress the special syntactic significance of any of `[]*?!^-\',
 *  and match the character exactly, precede it with a `\'.
 */

BOOLEAN MatchUTF8 (const char *Pattern, const char *Text);

int MatchUTF8e (const char *Pattern, const char *Text);



/******************************************************************************
 * Return TRUE if PATTERN has any special wildcard characters.
 */

BOOLEAN IsUTF8Pattern (const char *Pattern);



/******************************************************************************
 * Return TRUE if PATTERN has is a well formed regular expression according
 * to the above syntax.
 *
 * It also returns a more specific code in the integer pointed to by
 * ErrorCodePntr (NULL if you don't want the specific code).  The specific
 * error codes are:
 *
 *   PATTERN_VALID - pattern is well formed
 *   PATTERN_ESC   - pattern has invalid escape ('\' at end of pattern)
 *   PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-])
 *   PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g )
 *   PATTERN_EMPTY - [..] construct is empty (ie [])
 *   PATTERN_UTF8  - mangled UTF-8 encoding encountered in pattern, note that
 *                   mangled UTF-8 in the text will be considered to be equal
 *                   to the replacement character so that you can still match
 *                   it.
 */

BOOLEAN IsValidUTF8Pattern (const char *Pattern, int *ErrorCodePntr);

#endif /* MATCHUTF8__H */
